import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

if __name__ == '__main__':
    # 1. 获取数据集
    data = pd.read_csv('./data/FBlocation/train.csv')

    # 2. 基本数据处理
    # 2.1 缩小数据范围
    facebook_data = data.query('x > 2.0 & x < 3.0 & y > 2.0 & y < 3.0')
    # facebook_data = data

    # 2.2 选择时间特征
    time = pd.to_datetime(facebook_data['time'], unit='s')
    time = pd.DatetimeIndex(time)
    facebook_data['hour'] = time.hour
    facebook_data['day'] = time.day
    facebook_data['weekday'] = time.weekday

    # 2.3 去掉签到较少的地方
    place_count = facebook_data.groupby('place_id').count()
    place_count = place_count[place_count['row_id'] > 3]
    facebook_data = facebook_data[facebook_data['place_id'].isin(place_count.index)]

    # 2.4 确定特征值和目标值
    x = facebook_data[['x', 'y', 'accuracy', 'day', 'hour', 'weekday']]
    y = facebook_data['place_id']

    # 2.5 分割数据集
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=2, test_size=0.2)

    # 3. 特征工程--特征预处理（标准化）
    # 3.1 实例化一个转换器
    transfer = StandardScaler()

    # 3.2 调用fit_transform
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4. 机器学习--knn+cv
    # 4.1 实例化一个估计器
    estimator = KNeighborsClassifier()

    # 4.2 调用交叉验证网格搜索
    param_grid = {'n_neighbors': [3, 5, 7]}
    estimator = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=9, n_jobs=8)

    # 4.3 训练
    estimator.fit(x_train, y_train)

    # 5. 模型评估
    # 5.1 预测值输出
    y_pre = estimator.predict(x_test)
    print('预测值为:\n', y_pre)

    # 5.2 score
    score = estimator.score(x_test, y_test)
    print('准确率:\n', score)

    # 5.3 其他评价指标
    print('最好的模型:\n', estimator.best_estimator_)
    print('最好的结果:\n', estimator.best_score_)
    print('整体模型结果:\n', estimator.cv_results_)



